# ===================================DATA MANIPULATION===================================
import pandas as pd
import numpy as np
import os
from datetime import datetime
import scipy.stats as stats
from copy import deepcopy
from pprint import pprint
import itertools
import warnings
import logging
logging.disable(logging.CRITICAL)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", message="invalid value encountered in subtract")
warnings.filterwarnings("ignore", message="Passing 'suffixes' which cause duplicate columns")
# =========================================PLOT==========================================
import seaborn as sns
import plotly.graph_objs as go
import plotly.express as px
import matplotlib.pyplot as plt
# =======================================MODELLING=======================================
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
import lime
import lime.lime_tabular
# ========================================SKLEARN========================================
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
# =========================================KERAS=========================================
import math, time
from operator import itemgetter
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import LSTM
from keras.models import load_model
import keras
import h5py
import requests
import os
C:\Users\ji3yi\anaconda3\lib\site-packages\scipy\__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5
warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
stockcode_name = {
'03011':'AMLEX',
'7204':'DO',
'0128':'FRONTKN',
'0166':'INARI',
'0127':'JHM',
'9334':'KESM',
'0143':'KEYASIC',
'3867':'MPI',
'0070':'MQTECH',
'4359':'TURIYA',
'5005':'UNISEM',
'0120':'VIS',
'0097':'VITROX'
}
__path__ = "C:/Users/ji3yi/OneDrive - Universiti Malaya/Research Project/Production Code/data/yahoo_finance/monthly/"
os.chdir(__path__)
csv_files = [f for f in os.listdir() if f.endswith('.csv')]
for csv in csv_files:
stockcode = csv.split('.')[0]
for i in stockcode_name.keys():
name = stockcode_name.get(i).lower()
if stockcode == i:
globals()[f'df_{name}'] = pd.read_csv(csv)
exsig_name = {
'CAPUTLG3344S':'capacity_utilization_semicon',
'IY3344':'export_price_semicon',
'IZ3344':'import_price_semicon',
'PCU33443344':'ppi_semicon',
'CPIAUCSL':'cpi',
'DEXMAUS':'myr_to_usd',
'IPG3344S':'industrial_production_semicon',
'INDPRO':'industrial_production',
'PCU334413334413A':'ppi_other_semicon_chips_wafers_heatsinks',
'COINDUSZ3344':'import_price_origin',
'CES3133440001':'all_employees_semicon',
'ID8541':'export_price_index_harmonized',
'PCU3261993261992':'ppi_electrical_electronic_plastic',
'IP8542':'import_price_index_harmonized',
'VIXCLS':'cboe_volatility_index',
'A34SIS':'manufacturers_inventories_to_shipments'
}
__path__ = "C:/Users/ji3yi/OneDrive - Universiti Malaya/Research Project/Production Code/data/fred/monthly/"
os.chdir(__path__)
csv_files = [f for f in os.listdir() if f.endswith('.csv')]
df_exsig = pd.DataFrame()
for csv in csv_files:
exsig_code = csv.split('.')[0]
if exsig_code in exsig_name:
name = exsig_name[exsig_code].lower()
csv_path = os.path.join(__path__, csv)
try:
df = pd.read_csv(csv_path)
df = df.rename(columns={exsig_code: name})
globals()[f'df_{name}'] = df
except Exception as e:
print(f"Error reading {csv}: {e}")
df_exsig = pd.DataFrame()
df_exsig['ds'] = pd.date_range(start='2018-09-01', end='2023-09-01', freq='MS')
for exsig_code in exsig_name:
name = exsig_name[exsig_code].lower()
if f'df_{name}' in globals():
df_name = globals()[f'df_{name}']
if df_name['DATE'].duplicated().any():
df_name.drop_duplicates(subset='DATE', keep='first', inplace=True)
df_name.rename(columns={'DATE': 'ds'}, inplace=True)
df_name['ds'] = pd.to_datetime(df_name['ds'])
df_exsig = pd.merge(df_exsig, df_name, on='ds', how='left')
def wmape(y_true, y_pred):
"""adapted version of MPAE that solves the problem of diviosn by zero when there are no values"""
return round((np.abs(y_true - y_pred).sum() / np.abs(y_true).sum()), 4)
def mae(y_true, y_pred):
"""Adapted version of MAE that handles division by zero when there are no values"""
return round((np.abs(y_true - y_pred).sum() / max(np.count_nonzero(y_true), 1)), 4)
def rmse(y_true, y_pred):
"""Adapted version of RMSE that handles division by zero when there are no values"""
return round(np.sqrt((np.square(y_true - y_pred).sum()) / max(np.count_nonzero(y_true), 1)), 4)
for i in stockcode_name.keys():
name = stockcode_name.get(i).lower()
df_temp = globals()[f'df_{name}']
df_temp['Date'] = pd.to_datetime(df_temp['Date'])
df_temp['year'] = df_temp['Date'].dt.year
df_temp['month'] = df_temp['Date'].dt.month
df_temp['day'] = df_temp['Date'].dt.day
def getDataset(stockname, settings):
start = datetime.now()
data = globals()[f'df_{stockname.lower()}'].copy()
data['Date'] = pd.to_datetime(data['Date'])
df_exsig['ds'] = pd.to_datetime(df_exsig['ds'])
external_signals = {}
ext_signal_info = settings['hyperparameters']['external_signals']
ori_ds_col = data['Date'].copy()
for k in ext_signal_info.keys():
if k == 'external_signals_combo':
break
external_signals[k] = df_exsig[['ds', k]].copy()
external_signals[k].rename(columns={ext_signal_info[k]['date_col']: 'date_col', ext_signal_info[k]['value_col']: k},
inplace=True)
for k in external_signals.keys():
external_signals[k]['month'] = pd.PeriodIndex(external_signals[k]['date_col'], freq='M') + ext_signal_info[k]['lags']
external_signals[k]['ds'] = pd.PeriodIndex(external_signals[k]['month']).to_timestamp()
external_signals[k] = external_signals[k][['ds', k]]
start_month = min(data['Date'].to_list())
merged_cols = set()
for k in external_signals.keys():
if k not in merged_cols:
data = data.merge(external_signals[k], how='left', left_on='Date', right_on='ds')
merged_cols.add(k)
data.rename(columns={'Date':'ds', 'Adj Close':'y'}, inplace=True)
rel_columns = [col for col in data.columns if col in external_signals.keys() or
col in ['ds','y','year','month']]
data = data[rel_columns]
data = data.loc[:,~data.columns.duplicated()]
return data
def categorize(array, q1, q3):
if array < q1:
return 'Low'
elif array < q3:
return 'Medium'
elif array == np.inf:
return np.nan
else:
return 'High'
def evaluate(model, test_features, test_labels):
predictions = model.predict(test_features)
errors = abs(predictions - test_labels)
mape = 100 * np.mean(errors / test_labels)
accuracy = 100 - mape
def wmape(y_true, y_pred):
"""adapted version of MPAE that solves the problem of diviosn by zero when there are no values"""
return round((np.abs(y_true - y_pred).sum() / np.abs(y_true).sum()), 4)
w_mape = wmape(test_labels, predictions)
# print()
# print('Model Performance: ')
# print('Average Error: {:0.4f} degrees.'.format(np.mean(errors)))
# print('Accuracy = {:0.2f}%.'.format(accuracy))
# print('test mape: {:0.4f}'.format(w_mape))
return accuracy
def split_train_test_rf(settings, data):
external_signals = {}
ext_signal_info = settings['hyperparameters']['external_signals']
for k in ext_signal_info.keys():
if k == 'external_signals_combo':
break
external_signals[k] = df_exsig[['ds', k]].copy()
external_signals[k].rename(columns={ext_signal_info[k]['date_col']: 'date_col', ext_signal_info[k]['value_col']: k},
inplace=True)
rel_columns = [col for col in data.columns if col in ['year','month'] or col in external_signals.keys()]
data = data.fillna(0)
X = data[rel_columns]
y = data['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)
min_max_scaler = MinMaxScaler(feature_range=(0, 1))
X_train_scaled = min_max_scaler.fit_transform(X_train)
X_test_scaled = min_max_scaler.transform(X_test)
return rel_columns, X, y, X_train, X_test, y_train, y_test, min_max_scaler, X_train_scaled, X_test_scaled
def train_predict_rf(X_train_scaled, y_train, X_test_scaled, y_test):
# print("=======================================BASE MODEL=======================================")
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train_scaled, y_train)
predictions_train = rf.predict(X_train_scaled)
predictions_test = rf.predict(X_test_scaled)
# mse_train = mean_squared_error(y_train, predictions_train)
# mse_test = mean_squared_error(y_test, predictions_test)
# print('Base Model Performance: ')
mae_train = mae(y_train, predictions_train)
mae_test = mae(y_test, predictions_test)
# print('Train MAE: %.4f' % (mae_train))
# print('Test MAE: %.4f' % (mae_test))
rmse_train = rmse(y_train, predictions_train)
rmse_test = rmse(y_test, predictions_test)
# print('Train RMSE: %.4f' % (rmse_train))
# print('Test RMSE: %.4f' % (rmse_test))
mape_train = wmape(y_train, predictions_train)
mape_test = wmape(y_test, predictions_test)
# print('Train MAPE: %.4f' % (mape_train))
# print('Test MAPE: %.4f' % (mape_test))
residuals = y_test - predictions_test
# print(f'Averaged Residuals: ', np.mean(np.abs(residuals)))
# print()
return rf, predictions_train, predictions_test
def train_predict_rf_tuned(X_train_scaled, y_train, X_test_scaled, y_test, random_grid):
# print("=======================================BASE MODEL=======================================")
rf = RandomForestRegressor(**random_grid)
rf.fit(X_train_scaled, y_train)
predictions_train = rf.predict(X_train_scaled)
predictions_test = rf.predict(X_test_scaled)
# mse_train = mean_squared_error(y_train, predictions_train)
# mse_test = mean_squared_error(y_test, predictions_test)
# print('Base Model Performance: ')
mae_train = mae(y_train, predictions_train)
mae_test = mae(y_test, predictions_test)
# print('Train MAE: %.4f' % (mae_train))
# print('Test MAE: %.4f' % (mae_test))
rmse_train = rmse(y_train, predictions_train)
rmse_test = rmse(y_test, predictions_test)
# print('Train RMSE: %.4f' % (rmse_train))
# print('Test RMSE: %.4f' % (rmse_test))
mape_train = wmape(y_train, predictions_train)
mape_test = wmape(y_test, predictions_test)
# print('Train MAPE: %.4f' % (mape_train))
# print('Test MAPE: %.4f' % (mape_test))
residuals = y_test - predictions_test
# print(f'Averaged Residuals: ', np.mean(np.abs(residuals)))
# print()
return rf, predictions_train, predictions_test
def plot_base_prediction_static_rf(min_max_scaler, X_train_scaled, y_train, predictions_train,
X_test_scaled, y_test, predictions_test, stockname):
# print("======================================PLOT BASE MODEL======================================")
df_plot_train = pd.DataFrame(min_max_scaler.inverse_transform(X_train_scaled)[:,:2]).rename(columns={0:'year', 1:'month'})
df_plot_train = pd.concat([df_plot_train,pd.DataFrame(y_train),pd.DataFrame(predictions_train)],axis=1).rename(columns={0:'yhat1'})
df_plot_train['y'] = df_plot_train['y'].replace(0,df_plot_train['y'].mean())
df_plot_train[['year','month']] = df_plot_train[['year','month']].round().astype(int)
df_plot_train['ds'] = df_plot_train['year'].astype(str) + '-' + df_plot_train['month'].astype(str)
df_plot_train['ds'] = pd.to_datetime(df_plot_train['ds'])
df_plot_train.set_index('ds', inplace=True)
df_plot_test = pd.DataFrame(min_max_scaler.inverse_transform(X_test_scaled)[:,:2]).rename(columns={0:'year', 1:'month'})
df_plot_test = pd.concat([df_plot_test,pd.DataFrame(np.array([y_test]).reshape(-1,1))],axis=1).rename(columns={0:'y'})
df_plot_test = pd.concat([df_plot_test,pd.DataFrame(predictions_test)],axis=1).rename(columns={0:'yhat1'})
df_plot_test['y'] = df_plot_test['y'].replace(0,df_plot_test['y'].mean())
df_plot_test[['year','month']] = df_plot_test[['year','month']].round().astype(int)
df_plot_test['ds'] = df_plot_test['year'].astype(str) + '-' + df_plot_test['month'].astype(str)
df_plot_test['ds'] = pd.to_datetime(df_plot_test['ds'])
df_plot_test.set_index('ds', inplace=True)
# plt.plot(df_plot_train.index, df_plot_train['y'], label='actual train')
# plt.plot(df_plot_test['y'], label='actual test')
# plt.plot(df_plot_train['yhat1'], label='predicted train')
# plt.plot(df_plot_test['yhat1'], label='predicted test')
# plt.title(f'Random Forest {stockname.upper()}')
# plt.legend()
# plt.show()
return df_plot_train, df_plot_test
def plot_tuned_prediction_static_rf(X_train_scaled, y_train, predictions_train, X_test_scaled, y_test, predictions_test, stockname):
# print("====================================PLOT TUNED MODEL====================================")
df_plot_train = pd.DataFrame(min_max_scaler.inverse_transform(X_train_scaled)[:,:2]).rename(columns={0:'year', 1:'month'})
df_plot_train = pd.concat([df_plot_train,pd.DataFrame(y_train),pd.DataFrame(predictions_train)],axis=1).rename(columns={0:'yhat1'})
df_plot_train['y'] = df_plot_train['y'].replace(0,df_plot_train['y'].mean())
df_plot_train[['year','month']] = df_plot_train[['year','month']].round().astype(int)
df_plot_train['ds'] = df_plot_train['year'].astype(str) + '-' + df_plot_train['month'].astype(str)
df_plot_train['ds'] = pd.to_datetime(df_plot_train['ds'])
df_plot_train.set_index('ds', inplace=True)
df_plot_test = pd.DataFrame(min_max_scaler.inverse_transform(X_test_scaled)[:,:2]).rename(columns={0:'year', 1:'month'})
df_plot_test = pd.concat([df_plot_test,pd.DataFrame(np.array([y_test]).reshape(-1,1))],axis=1).rename(columns={0:'y'})
df_plot_test = pd.concat([df_plot_test,pd.DataFrame(predictions_test)],axis=1).rename(columns={0:'yhat1'})
df_plot_test['y'] = df_plot_test['y'].replace(0,df_plot_test['y'].mean())
df_plot_test[['year','month']] = df_plot_test[['year','month']].round().astype(int)
df_plot_test['ds'] = df_plot_test['year'].astype(str) + '-' + df_plot_test['month'].astype(str)
df_plot_test['ds'] = pd.to_datetime(df_plot_test['ds'])
df_plot_test.set_index('ds', inplace=True)
# plt.plot(df_plot_train.index, df_plot_train['y'], label='actual train')
# plt.plot(df_plot_test['y'], label='actual test')
# plt.plot(df_plot_train['yhat1'], label='predicted train')
# plt.plot(df_plot_test['yhat1'], label='predicted test')
# plt.title(f'Random Forest {stockname.upper()}')
# plt.legend()
# plt.show()
return df_plot_train, df_plot_test
def compute_lime_rf(X_train, X_test, rf_tuned, rel_columns):
# print("==========================================LIME==========================================")
explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, feature_names=X_train.columns.values.tolist(),
class_names=['Adj Close'], verbose=False, mode='regression')
j = 5
print(X_test.values[j].shape)
exp = explainer.explain_instance(X_test.values[j], rf_tuned.predict, num_features=len(rel_columns))
exp.show_in_notebook(show_table=True)
print(exp.as_list())
def create_dataset(dataset, look_back=None):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return np.array(dataX), np.array(dataY)
def create_dataset_with_features(dataset, look_back=None):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i + look_back), 1:]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return np.array(dataX), np.array(dataY)
def split_train_test_lstm(data, look_back=None):
data.set_index('ds', inplace=True)
data['ds'] = data.index
data['ds'] = pd.to_datetime(data['ds'])
rel_columns = [col for col in data.columns if col not in ['ds','y']]
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
dataset = min_max_scaler.fit_transform(data['y'].values.reshape(-1, 1))
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
x_train, y_train = create_dataset(train, look_back)
x_test, y_test = create_dataset(test, look_back)
x_train = np.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1]))
x_test = np.reshape(x_test, (x_test.shape[0], 1, x_test.shape[1]))
return rel_columns, min_max_scaler, train_size, test_size, train, test, x_train, y_train, x_test, y_test, dataset
def train_predict_lstm_lookback(x_train, y_train, x_test, y_test, min_max_scaler, dataset, stockname, look_back=None):
model = Sequential()
model.add(LSTM(20, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=20, batch_size=1, verbose=0)
trainPredict = model.predict(x_train)
testPredict = model.predict(x_test)
trainPredict = trainPredict.reshape(-1, 1)
testPredict = testPredict.reshape(-1, 1)
# invert predictions
trainPredict = min_max_scaler.inverse_transform(trainPredict)
trainY = min_max_scaler.inverse_transform([y_train])
testPredict = min_max_scaler.inverse_transform(testPredict)
testY = min_max_scaler.inverse_transform([y_test])
# calculate root mean squared error
# print()
trainRMSE = rmse(trainY[0], trainPredict[:,0])
testRMSE = rmse(testY[0], testPredict[:,0])
# print('Train RMSE: %.4f' % (trainRMSE))
# print('Test RMSE: %.4f' % (testRMSE))
trainMAE = mae(trainY[0], trainPredict[:,0])
testMAE = mae(testY[0], testPredict[:,0])
# print('Train MAE: %.4f' % (trainMAE))
# print('Test MAE: %.4f' % (testMAE))
trainMAPE = wmape(trainY[0], trainPredict[:,0])
testMAPE = wmape(testY[0], testPredict[:,0])
# print('Train MAPE: %.4f' % (trainMAPE))
# print('Test MAPE: %.4f' % (testMAPE))
# print()
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
# plt.plot(min_max_scaler.inverse_transform(dataset), label='actual')
# plt.plot(trainPredictPlot, label='train')
# plt.plot(testPredictPlot, label='test')
# plt.legend()
# plt.title(f'LSTM {stockname.upper()}')
# plt.show()
return trainPredict, trainY, testPredict, testY
def train_predict_lstm_exsig(data, look_back=None):
data = data.fillna(0)
data.set_index('ds', inplace=True)
data['ds'] = data.index
data['ds'] = pd.to_datetime(data['ds'])
y_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
dataset = y_scaler.fit_transform(data['y'].values.reshape(-1, 1))
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
# train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# Extracting features from settings
features = []
features_key = []
for key in settings["hyperparameters"]["external_signals"]:
if key != "external_signals_combo":
feature_data = settings["hyperparameters"]["external_signals"][key]["value_col"]
features.append(data[feature_data].values.reshape(-1, 1))
features_key.append(key)
# Concatenating features along the second axis
if len(features) > 0:
dataset_with_features = np.concatenate([dataset] + features, axis=1)
else:
dataset_with_features = dataset # Use only the time series data if no features are present
# Splitting the dataset with features
train, test = dataset_with_features[0:train_size, :], dataset_with_features[train_size:len(dataset), :]
# Separating features and target for scaling
train_features = train[:, 1:] # Exclude the target column
test_features = test[:, 1:]
# Scaling features
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
train_features_scaled = min_max_scaler.fit_transform(train_features)
test_features_scaled = min_max_scaler.transform(test_features) # Use transform only for test set
# Concatenating scaled features with target column
train_scaled = np.concatenate((train[:, :1], train_features_scaled), axis=1)
test_scaled = np.concatenate((test[:, :1], test_features_scaled), axis=1)
# Creating datasets
x_train, y_train = create_dataset_with_features(train_scaled, look_back)
x_test, y_test = create_dataset_with_features(test_scaled, look_back)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], len(features_key)))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], len(features_key)))
look_back = 1
model = Sequential()
model.add(LSTM(20, input_shape=(1, x_train.shape[2]))) # Adjust input shape to include features
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=20, batch_size=1, verbose=0)
trainPredict = model.predict(x_train)
testPredict = model.predict(x_test)
return y_scaler, train_size, test_size, train, test, min_max_scaler, train_features_scaled, test_features_scaled, train_scaled, test_scaled, x_train, y_train, x_test, y_test, trainPredict, testPredict, model, features_key
def plot_prediction_static_lstm(y_scaler, y_train, y_test, trainPredict, testPredict, stockname):
# print("==========================================PLOT==========================================")
train = pd.DataFrame(y_scaler.inverse_transform(y_train.reshape(-1,1)), columns=['y'])
test = pd.DataFrame(y_scaler.inverse_transform(y_test.reshape(-1,1)), columns=['y'])
train_pred = pd.DataFrame(y_scaler.inverse_transform(trainPredict.reshape(-1,1)), columns=['yhat1'])
test_pred = pd.DataFrame(y_scaler.inverse_transform(testPredict.reshape(-1,1)), columns=['yhat1'])
train['ds'] = data.iloc[:train_size, data.columns.get_loc('ds')].reset_index(drop=True)
test['ds'] = data.iloc[train_size + 1:, data.columns.get_loc('ds')].reset_index(drop=True)
train_pred['ds'] = data.iloc[:train_size, data.columns.get_loc('ds')].reset_index(drop=True)
test_pred['ds'] = data.iloc[train_size + 1:, data.columns.get_loc('ds')].reset_index(drop=True)
train.set_index('ds', inplace=True)
test.set_index('ds', inplace=True)
train_pred.set_index('ds', inplace=True)
test_pred.set_index('ds', inplace=True)
# plt.plot(train, label='actual train')
# plt.plot(test, label='actual test')
# plt.plot(train_pred, label='predicted train')
# plt.plot(test_pred, label='predicted test')
# plt.legend()
# plt.title(f'LSTM {stockname.upper()}')
# plt.show()
print()
trainRMSE = rmse(train['y'], train_pred['yhat1'])
testRMSE = rmse(test['y'], test_pred['yhat1'])
# print('Train RMSE: %.4f' % (trainRMSE))
# print('Test RMSE: %.4f' % (testRMSE))
trainMAE = mae(train['y'], train_pred['yhat1'])
testMAE = mae(test['y'], test_pred['yhat1'])
# print('Train MAE: %.4f' % (trainMAE))
# print('Test MAE: %.4f' % (testMAE))
trainMAPE = wmape(train['y'], train_pred['yhat1'])
testMAPE = wmape(test['y'], test_pred['yhat1'])
# print('Train MAPE: %.4f' % (trainMAPE))
# print('Test MAPE: %.4f' % (testMAPE))
# print()
df_plot_train = pd.concat([train, train_pred], axis=1)
df_plot_test = pd.concat([test, test_pred], axis=1)
return train, test, train_pred, test_pred, df_plot_train, df_plot_test
def calculate_pe_ratio_train(df_plot_train, df_eps_train):
# ==================================================TRAIN=======================================================
df_plot_train['quarter'] = pd.PeriodIndex(df_plot_train.index, freq='Q').quarter
df_plot_train['year'] = pd.PeriodIndex(df_plot_train.index, freq='Q').year
df_eps_train = pd.merge(df_plot_train, df_eps_train, left_on=['year','quarter'], right_on=['year','quarter'])
df_eps_train.index = df_plot_train.index
df_eps_train['PE'] = df_eps_train['y']/df_eps_train['eps']
df_eps_train
q1 = np.percentile(df_eps_train['PE'], 25)
q3 = np.percentile(df_eps_train['PE'], 75)
df_eps_train['category'] = df_eps_train['PE'].apply(lambda x: categorize(x, q1, q3))
return df_eps_train
def calculate_pe_ratio_test(df_plot_test, df_eps_test):
# ==================================================TEST========================================================
df_plot_test['quarter'] = pd.PeriodIndex(df_plot_test.index, freq='Q').quarter
df_plot_test['year'] = pd.PeriodIndex(df_plot_test.index, freq='Q').year
df_eps_test = pd.merge(df_plot_test, df_eps_test, left_on=['year','quarter'], right_on=['year','quarter'])
df_eps_test.index = df_plot_test.index
df_eps_test['PE'] = df_eps_test['y']/df_eps_test['eps']
df_eps_test
q1 = np.percentile(df_eps_test['PE'], 25)
q3 = np.percentile(df_eps_test['PE'], 75)
df_eps_test['category'] = df_eps_test['PE'].apply(lambda x: categorize(x, q1, q3))
return df_eps_test
def plot_decision_static(df_eps_train, df_eps_test, stockname):
# ==================================================PLOT========================================================
category_colors = {'Low': 'green', 'Medium': 'orange', 'High': 'red'}
category_markers = {'Low': 'o', 'Medium': '^', 'High': 's'}
plt.figure(figsize=(8, 6))
df = pd.concat([df_eps_train,df_eps_test], axis=0)
for category, group in df.groupby('category'):
plt.scatter(group.index, group['yhat1'], label=category, color=category_colors[category],
marker=category_markers[category], s=50)
plt.xlabel('Date')
plt.ylabel('Predicted Adjusted Close Price')
plt.title(f'From {min(df.index.date)} to {max(df.index.date)} for {stockname.upper()}')
plt.legend()
plt.grid(True)
plt.show()
def plot_decision_interactive(df_eps_train, df_eps_test, stockname):
matplotlib_to_plotly_symbols = {'o': 'circle', '^': 'triangle-up', 's': 'square'}
category_markers = {'Low': 'o', 'Medium': '^', 'High': 's'}
category_plotly_symbols = {k: matplotlib_to_plotly_symbols[v] for k, v in category_markers.items()}
category_colors = {'Low': 'green', 'Medium': 'orange', 'High': 'red'}
df = pd.concat([df_eps_train, df_eps_test], axis=0)
traces = []
for category, group in df.groupby('category'):
trace = go.Scatter(
x=group.index,
y=group['yhat1'],
mode='markers',
name=category,
marker=dict(
color=category_colors[category],
symbol=category_plotly_symbols[category],
size=10,
),
)
traces.append(trace)
secondary_trace_pe = go.Scatter(
x=df.index,
y=df['PE'], # Assuming 'PE' is the column for the secondary axis
name='PE',
yaxis='y2', # Assigning this trace to a secondary y-axis 'y2'
mode='lines'
)
traces.append(secondary_trace_pe)
layout = go.Layout(
title=f'From {min(df.index.date)} to {max(df.index.date)} for {stockname.upper()}',
xaxis=dict(title='Date'),
yaxis=dict(title='Predicted Adjusted Close Price'),
yaxis2=dict(
title='PE Ratio', # Title for the secondary y-axis
overlaying='y', # Overlay the secondary y-axis on the primary y-axis
side='right', # Display the secondary axis on the right side
position=0.98, # Adjust position as needed (0-1)
),
showlegend=True,
)
fig = go.Figure(data=traces, layout=layout)
fig.show()
stockname = 'amlex'
settings = {
"hyperparameters": {
"external_signals": {
"export_price_semicon": {
"date_col":"ds",
"value_col":"export_price_semicon",
"lags":2,
},
"manufacturers_inventories_to_shipments": {
"date_col":"ds",
"value_col":"manufacturers_inventories_to_shipments",
"lags":3,
},
"external_signals_combo": [
[],
[
"export_price_semicon"
],
[
"manufacturers_inventories_to_shipments"
],
[
"export_price_semicon",
"manufacturers_inventories_to_shipments"
]
]
}
}
}
random_grid = {'n_estimators': 600,
'min_samples_split': 5,
'min_samples_leaf': 2,
'max_features': 'auto',
'max_depth': 70,
'bootstrap': False}
data = getDataset(stockname=stockname, settings=settings)
rel_columns, X, y, X_train, X_test, y_train, y_test, min_max_scaler, X_train_scaled, X_test_scaled = split_train_test_rf(settings, data)
rf, predictions_train, predictions_test = train_predict_rf_tuned(X_train_scaled, y_train, X_test_scaled, y_test, random_grid)
df_plot_train, df_plot_test = plot_tuned_prediction_static_rf(X_train_scaled, y_train, predictions_train, X_test_scaled,
y_test, predictions_test, stockname)
amlex_eps = [
{'year': 2018, 'quarter': 1, 'eps': 0},
{'year': 2018, 'quarter': 2, 'eps': 0},
{'year': 2018, 'quarter': 3, 'eps': 0},
{'year': 2018, 'quarter': 4, 'eps': 0},
{'year': 2019, 'quarter': 1, 'eps': -0.01},
{'year': 2019, 'quarter': 2, 'eps': 0},
{'year': 2019, 'quarter': 3, 'eps': 0.11},
{'year': 2019, 'quarter': 4, 'eps': 0},
{'year': 2020, 'quarter': 1, 'eps': -0.86},
{'year': 2020, 'quarter': 2, 'eps': 0},
{'year': 2020, 'quarter': 3, 'eps': 0},
{'year': 2020, 'quarter': 4, 'eps': 0},
{'year': 2021, 'quarter': 1, 'eps': 0.31},
{'year': 2021, 'quarter': 2, 'eps': 0},
{'year': 2021, 'quarter': 3, 'eps': 0.95},
{'year': 2021, 'quarter': 4, 'eps': 0},
{'year': 2022, 'quarter': 1, 'eps': 2.43},
{'year': 2022, 'quarter': 2, 'eps': 0},
{'year': 2022, 'quarter': 3, 'eps': 2.16},
{'year': 2022, 'quarter': 4, 'eps': 0},
{'year': 2023, 'quarter': 1, 'eps': 1.09},
{'year': 2023, 'quarter': 2, 'eps': 0},
{'year': 2023, 'quarter': 3, 'eps': 0.04},
{'year': 2023, 'quarter': 4, 'eps': 0},
{'year': 2024, 'quarter': 1, 'eps': 0.17}
]
df_eps_train = pd.DataFrame(amlex_eps)
df_eps_train['eps'] = df_eps_train['eps'].shift(-2)
df_eps_test = pd.DataFrame(amlex_eps)
df_eps_test['eps'] = df_eps_test['eps'].shift(-2)
df_eps_train_ = calculate_pe_ratio_train(df_plot_train, df_eps_train)
df_eps_test_ = calculate_pe_ratio_test(df_plot_test, df_eps_test)
plot_decision_static(df_eps_train_, df_eps_test_, stockname)
plot_decision_interactive(df_eps_train_, df_eps_test_, stockname)
stockname = 'do'
settings = {
"hyperparameters": {
"external_signals": {
"industrial_production_semicon": {
"date_col":"ds",
"value_col":"industrial_production_semicon",
"lags":0
},
"export_price_index_harmonized": {
"date_col":"ds",
"value_col":"export_price_index_harmonized",
"lags":0
},
"manufacturers_inventories_to_shipments": {
"date_col":"ds",
"value_col":"manufacturers_inventories_to_shipments",
"lags":0
},
"external_signals_combo": [
[],
[
"industrial_production_semicon"
],
[
"export_price_index_harmonized"
],
[
"manufacturers_inventories_to_shipments"
],
[
"industrial_production_semicon",
"export_price_index_harmonized",
"manufacturers_inventories_to_shipments"
]
]
}
}
}
random_grid = {'n_estimators': 2000,
'min_samples_split': 10,
'min_samples_leaf': 2,
'max_features': 'auto',
'max_depth': 40,
'bootstrap': True}
data = getDataset(stockname=stockname, settings=settings)
rel_columns, X, y, X_train, X_test, y_train, y_test, min_max_scaler, X_train_scaled, X_test_scaled = split_train_test_rf(settings, data)
rf, predictions_train, predictions_test = train_predict_rf_tuned(X_train_scaled, y_train, X_test_scaled, y_test, random_grid)
df_plot_train, df_plot_test = plot_tuned_prediction_static_rf(X_train_scaled, y_train, predictions_train, X_test_scaled,
y_test, predictions_test, stockname)
do_eps = [
{'year': 2018, 'quarter': 1, 'eps': 0.51},
{'year': 2018, 'quarter': 2, 'eps': 0.78},
{'year': 2018, 'quarter': 3, 'eps': 1.00},
{'year': 2018, 'quarter': 4, 'eps': 1.12},
{'year': 2019, 'quarter': 1, 'eps': 0.55},
{'year': 2019, 'quarter': 2, 'eps': 0.37},
{'year': 2019, 'quarter': 3, 'eps': 0.81},
{'year': 2019, 'quarter': 4, 'eps': 1.07},
{'year': 2020, 'quarter': 1, 'eps': 0.35},
{'year': 2020, 'quarter': 2, 'eps': 0.06},
{'year': 2020, 'quarter': 3, 'eps': 1.30},
{'year': 2020, 'quarter': 4, 'eps': 2.43},
{'year': 2021, 'quarter': 1, 'eps': 2.28},
{'year': 2021, 'quarter': 2, 'eps': 2.04},
{'year': 2021, 'quarter': 3, 'eps': 1.56},
{'year': 2021, 'quarter': 4, 'eps': 2.96},
{'year': 2022, 'quarter': 1, 'eps': 2.47},
{'year': 2022, 'quarter': 2, 'eps': 1.00},
{'year': 2022, 'quarter': 3, 'eps': 1.27},
{'year': 2022, 'quarter': 4, 'eps': 0.94},
{'year': 2023, 'quarter': 1, 'eps': 0.07},
{'year': 2023, 'quarter': 2, 'eps': 0.06},
{'year': 2023, 'quarter': 3, 'eps': 1.47},
{'year': 2023, 'quarter': 4, 'eps': 0},
{'year': 2024, 'quarter': 1, 'eps': 0}
]
df_eps_train = pd.DataFrame(do_eps)
df_eps_train['eps'] = df_eps_train['eps'].shift(-2)
df_eps_test = pd.DataFrame(do_eps)
df_eps_test['eps'] = df_eps_test['eps'].shift(-2)
df_eps_train_ = calculate_pe_ratio_train(df_plot_train, df_eps_train)
df_eps_test_ = calculate_pe_ratio_test(df_plot_test, df_eps_test)
plot_decision_static(df_eps_train_, df_eps_test_, stockname)
plot_decision_interactive(df_eps_train_, df_eps_test_, stockname)
stockname = 'frontkn'
settings = {
"hyperparameters": {
"external_signals": {
"export_price_semicon": {
"date_col":"ds",
"value_col":"export_price_semicon",
"lags":0
},
"cpi": {
"date_col":"ds",
"value_col":"cpi",
"lags":0
},
"cboe_volatility_index": {
"date_col":"ds",
"value_col":"cboe_volatility_index",
"lags":10
},
"manufacturers_inventories_to_shipments": {
"date_col":"ds",
"value_col":"manufacturers_inventories_to_shipments",
"lags":0
},
"external_signals_combo": [
[],
[
"export_price_semicon"
],
[
"cpi"
],
[
"cboe_volatility_index"
],
[
"manufacturers_inventories_to_shipments"
],
[
"export_price_semicon",
"cpi",
"cboe_volatility_index",
"manufacturers_inventories_to_shipments"
]
]
}
}
}
data = getDataset(stockname=stockname, settings=settings)
rel_columns, min_max_scaler, train_size, test_size, train, test, x_train, y_train, x_test, y_test, dataset = split_train_test_lstm(data, look_back=1)
y_scaler, train_size, test_size, train, test, min_max_scaler, train_features_scaled, test_features_scaled, train_scaled, test_scaled, x_train, y_train, x_test, y_test, trainPredict, testPredict, model, features_key = train_predict_lstm_exsig(data, look_back=1)
train, test, train_pred, test_pred, df_plot_train, df_plot_test = plot_prediction_static_lstm(y_scaler, y_train, y_test, trainPredict, testPredict, stockname)
frontkn_eps = [
{'year': 2018, 'quarter': 1, 'eps': 0.6},
{'year': 2018, 'quarter': 2, 'eps': 1.15},
{'year': 2018, 'quarter': 3, 'eps': 1.45},
{'year': 2018, 'quarter': 4, 'eps': 1.78},
{'year': 2019, 'quarter': 1, 'eps': 1.47},
{'year': 2019, 'quarter': 2, 'eps': 1.58},
{'year': 2019, 'quarter': 3, 'eps': 1.82},
{'year': 2019, 'quarter': 4, 'eps': 1.74},
{'year': 2020, 'quarter': 1, 'eps': 1.62},
{'year': 2020, 'quarter': 2, 'eps': 1.94},
{'year': 2020, 'quarter': 3, 'eps': 2.04},
{'year': 2020, 'quarter': 4, 'eps': 2.22},
{'year': 2021, 'quarter': 1, 'eps': 2.19},
{'year': 2021, 'quarter': 2, 'eps': 1.57},
{'year': 2021, 'quarter': 3, 'eps': 1.74},
{'year': 2021, 'quarter': 4, 'eps': 1.88},
{'year': 2022, 'quarter': 1, 'eps': 1.69},
{'year': 2022, 'quarter': 2, 'eps': 2.05},
{'year': 2022, 'quarter': 3, 'eps': 2.21},
{'year': 2022, 'quarter': 4, 'eps': 1.9},
{'year': 2023, 'quarter': 1, 'eps': 1.7},
{'year': 2023, 'quarter': 2, 'eps': 2.03},
{'year': 2023, 'quarter': 3, 'eps': 1.5},
{'year': 2023, 'quarter': 4, 'eps': 0},
{'year': 2024, 'quarter': 1, 'eps': 0}
]
df_eps_train = pd.DataFrame(frontkn_eps)
df_eps_train['eps'] = df_eps_train['eps'].shift(-2)
df_eps_test = pd.DataFrame(frontkn_eps)
df_eps_test['eps'] = df_eps_test['eps'].shift(-2)
df_eps_train = calculate_pe_ratio_train(df_plot_train, df_eps_train)
df_eps_test = calculate_pe_ratio_test(df_plot_test, df_eps_test)
plot_decision_static(df_eps_train, df_eps_test, stockname)
plot_decision_interactive(df_eps_train, df_eps_test, stockname)
2/2 [==============================] - 0s 1ms/step 1/1 [==============================] - 0s 17ms/step
stockname = 'inari'
settings = {
"hyperparameters": {
"external_signals": {
"industrial_production_semicon": {
"date_col":"ds",
"value_col":"industrial_production_semicon",
"lags":0
},
"export_price_index_harmonized": {
"date_col":"ds",
"value_col":"export_price_index_harmonized",
"lags":1
},
"cboe_volatility_index": {
"date_col":"ds",
"value_col":"cboe_volatility_index",
"lags":10
},
"manufacturers_inventories_to_shipments": {
"date_col":"ds",
"value_col":"manufacturers_inventories_to_shipments",
"lags":1
},
"external_signals_combo": [
[],
[
"industrial_production_semicon"
],
[
"export_price_index_harmonized"
],
[
"cboe_volatility_index"
],
[
"manufacturers_inventories_to_shipments"
],
[
"industrial_production_semicon",
"export_price_index_harmonized",
"cboe_volatility_index",
"manufacturers_inventories_to_shipments"
]
]
}
}
}
data = getDataset(stockname=stockname, settings=settings)
rel_columns, min_max_scaler, train_size, test_size, train, test, x_train, y_train, x_test, y_test, dataset = split_train_test_lstm(data, look_back=1)
y_scaler, train_size, test_size, train, test, min_max_scaler, train_features_scaled, test_features_scaled, train_scaled, test_scaled, x_train, y_train, x_test, y_test, trainPredict, testPredict, model, features_key = train_predict_lstm_exsig(data, look_back=1)
train, test, train_pred, test_pred, df_plot_train, df_plot_test = plot_prediction_static_lstm(y_scaler, y_train, y_test, trainPredict, testPredict, stockname)
inari_eps = [
{'year': 2018, 'quarter': 1, 'eps': 3.41},
{'year': 2018, 'quarter': 2, 'eps': 3.37},
{'year': 2018, 'quarter': 3, 'eps': 2.63},
{'year': 2018, 'quarter': 4, 'eps': 1.8},
{'year': 2019, 'quarter': 1, 'eps': 1.91},
{'year': 2019, 'quarter': 2, 'eps': 1.73},
{'year': 2019, 'quarter': 3, 'eps': 1.2},
{'year': 2019, 'quarter': 4, 'eps': 1.19},
{'year': 2020, 'quarter': 1, 'eps': 1.5},
{'year': 2020, 'quarter': 2, 'eps': 1.18},
{'year': 2020, 'quarter': 3, 'eps': 1.08},
{'year': 2020, 'quarter': 4, 'eps': 1.09},
{'year': 2021, 'quarter': 1, 'eps': 2.15},
{'year': 2021, 'quarter': 2, 'eps': 2.74},
{'year': 2021, 'quarter': 3, 'eps': 2.47},
{'year': 2021, 'quarter': 4, 'eps': 2.64},
{'year': 2022, 'quarter': 1, 'eps': 2.98},
{'year': 2022, 'quarter': 2, 'eps': 2.91},
{'year': 2022, 'quarter': 3, 'eps': 2.44},
{'year': 2022, 'quarter': 4, 'eps': 2.33},
{'year': 2023, 'quarter': 1, 'eps': 2.86},
{'year': 2023, 'quarter': 2, 'eps': 2.51},
{'year': 2023, 'quarter': 3, 'eps': 1.54},
{'year': 2023, 'quarter': 4, 'eps': 1.78},
{'year': 2024, 'quarter': 1, 'eps': 2.27},
]
df_eps_train = pd.DataFrame(inari_eps)
df_eps_train['eps'] = df_eps_train['eps'].shift(-2)
df_eps_test = pd.DataFrame(inari_eps)
df_eps_test['eps'] = df_eps_test['eps'].shift(-2)
df_eps_train = calculate_pe_ratio_train(df_plot_train, df_eps_train)
df_eps_test = calculate_pe_ratio_test(df_plot_test, df_eps_test)
plot_decision_static(df_eps_train, df_eps_test, stockname)
plot_decision_interactive(df_eps_train, df_eps_test, stockname)
2/2 [==============================] - 0s 2ms/step 1/1 [==============================] - 0s 17ms/step
stockname = 'jhm'
settings = {
"hyperparameters": {
"external_signals": {
"external_signals_combo": [
[],
]
}
}
}
data = getDataset(stockname=stockname, settings=settings)
rel_columns, min_max_scaler, train_size, test_size, train, test, x_train, y_train, x_test, y_test, dataset = split_train_test_lstm(data, look_back=3)
trainPredict, trainY, testPredict, testY = train_predict_lstm_lookback(x_train, y_train, x_test, y_test, min_max_scaler, dataset, stockname, look_back=3)
jhm_eps = [
{'year': 2018, 'quarter': 1, 'eps': 1.43},
{'year': 2018, 'quarter': 2, 'eps': 1.51},
{'year': 2018, 'quarter': 3, 'eps': 1.73},
{'year': 2018, 'quarter': 4, 'eps': 2.07},
{'year': 2019, 'quarter': 1, 'eps': 1.5},
{'year': 2019, 'quarter': 2, 'eps': 1.29},
{'year': 2019, 'quarter': 3, 'eps': 1.29},
{'year': 2019, 'quarter': 4, 'eps': 1.39},
{'year': 2020, 'quarter': 1, 'eps': 0.95},
{'year': 2020, 'quarter': 2, 'eps': 0.49},
{'year': 2020, 'quarter': 3, 'eps': 0.95},
{'year': 2020, 'quarter': 4, 'eps': 1.44},
{'year': 2021, 'quarter': 1, 'eps': 1.62},
{'year': 2021, 'quarter': 2, 'eps': 1.67},
{'year': 2021, 'quarter': 3, 'eps': 0.58},
{'year': 2021, 'quarter': 4, 'eps': 2.32},
{'year': 2022, 'quarter': 1, 'eps': 1.73},
{'year': 2022, 'quarter': 2, 'eps': 1.75},
{'year': 2022, 'quarter': 3, 'eps': 0.17},
{'year': 2022, 'quarter': 4, 'eps': 0.39},
{'year': 2023, 'quarter': 1, 'eps': 0.29},
{'year': 2023, 'quarter': 2, 'eps': 1.11},
{'year': 2023, 'quarter': 3, 'eps': 0.89},
{'year': 2023, 'quarter': 4, 'eps': 0},
{'year': 2024, 'quarter': 1, 'eps': 0}
]
df_eps_train = pd.DataFrame(jhm_eps)
df_eps_test = pd.DataFrame(jhm_eps)
df_eps_train = calculate_pe_ratio_train(df_plot_train, df_eps_train)
df_eps_test = calculate_pe_ratio_test(df_plot_test, df_eps_test)
plot_decision_static(df_eps_train, df_eps_test, stockname)
plot_decision_interactive(df_eps_train, df_eps_test, stockname)
2/2 [==============================] - 0s 2ms/step 1/1 [==============================] - 0s 16ms/step
stockname = 'kesm'
settings = {
"hyperparameters": {
"external_signals": {
"capacity_utilization_semicon": {
"date_col":"ds",
"value_col":"capacity_utilization_semicon",
"lags":0
},
"import_price_semicon": {
"date_col":"ds",
"value_col":"import_price_semicon",
"lags":10
},
"ppi_semicon": {
"date_col":"ds",
"value_col":"ppi_semicon",
"lags":3
},
"myr_to_usd": {
"date_col":"ds",
"value_col":"myr_to_usd",
"lags":0
},
"industrial_production": {
"date_col":"ds",
"value_col":"industrial_production",
"lags":8
},
"all_employees_semicon": {
"date_col":"ds",
"value_col":"all_employees_semicon",
"lags":0
},
"import_price_index_harmonized": {
"date_col":"ds",
"value_col":"import_price_index_harmonized",
"lags":5
},
"external_signals_combo": [
[],
[
"capacity_utilization_semicon"
],
[
"import_price_semicon"
],
[
"ppi_semicon"
],
[
"myr_to_usd"
],
[
"industrial_production"
],
[
"all_employees_semicon"
],
[
"import_price_index_harmonized"
],
[
"capacity_utilization_semicon",
"import_price_semicon",
"ppi_semicon",
"myr_to_usd",
"industrial_production",
"all_employees_semicon",
"import_price_index_harmonized",
]
]
}
}
}
random_grid = {'n_estimators':100}
data = getDataset(stockname=stockname, settings=settings)
rel_columns, X, y, X_train, X_test, y_train, y_test, min_max_scaler, X_train_scaled, X_test_scaled = split_train_test_rf(settings, data)
rf, predictions_train, predictions_test = train_predict_rf_tuned(X_train_scaled, y_train, X_test_scaled, y_test, random_grid)
df_plot_train, df_plot_test = plot_tuned_prediction_static_rf(X_train_scaled, y_train, predictions_train, X_test_scaled,
y_test, predictions_test, stockname)
kesm_eps = [
{'year': 2018, 'quarter': 1, 'eps': 26.4},
{'year': 2018, 'quarter': 2, 'eps': 26.0},
{'year': 2018, 'quarter': 3, 'eps': 12.7},
{'year': 2018, 'quarter': 4, 'eps': 26.3},
{'year': 2019, 'quarter': 1, 'eps': 6.1},
{'year': 2019, 'quarter': 2, 'eps': 1.1},
{'year': 2019, 'quarter': 3, 'eps': 2.0},
{'year': 2019, 'quarter': 4, 'eps': 5.33},
{'year': 2020, 'quarter': 1, 'eps': 10.53},
{'year': 2020, 'quarter': 2, 'eps': 4.34},
{'year': 2020, 'quarter': 3, 'eps': -7.02},
{'year': 2020, 'quarter': 4, 'eps': -7.63},
{'year': 2021, 'quarter': 1, 'eps': 1.78},
{'year': 2021, 'quarter': 2, 'eps': 14.72},
{'year': 2021, 'quarter': 3, 'eps': 2.08},
{'year': 2021, 'quarter': 4, 'eps': -1.52},
{'year': 2022, 'quarter': 1, 'eps': 17.5},
{'year': 2022, 'quarter': 2, 'eps': -2.35},
{'year': 2022, 'quarter': 3, 'eps': -5.4},
{'year': 2022, 'quarter': 4, 'eps': -5.82},
{'year': 2023, 'quarter': 1, 'eps': -3.58},
{'year': 2023, 'quarter': 2, 'eps': -1.67},
{'year': 2023, 'quarter': 3, 'eps': -2.76},
{'year': 2023, 'quarter': 4, 'eps': 0.73},
{'year': 2024, 'quarter': 1, 'eps': 2.13}
]
df_eps_train = pd.DataFrame(kesm_eps)
df_eps_train['eps'] = df_eps_train['eps'].shift(-1) #kesm only shift 1
df_eps_test = pd.DataFrame(kesm_eps)
df_eps_test['eps'] = df_eps_test['eps'].shift(-1)
df_eps_train_ = calculate_pe_ratio_train(df_plot_train, df_eps_train)
df_eps_test_ = calculate_pe_ratio_test(df_plot_test, df_eps_test)
plot_decision_static(df_eps_train_, df_eps_test_, stockname)
plot_decision_interactive(df_eps_train_, df_eps_test_, stockname)
stockname = 'keyasic'
settings = {
"hyperparameters": {
"external_signals": {
"external_signals_combo": [
[],
]
}
}
}
data = getDataset(stockname=stockname, settings=settings)
rel_columns, min_max_scaler, train_size, test_size, train, test, x_train, y_train, x_test, y_test, dataset = split_train_test_lstm(data, look_back=3)
trainPredict, trainY, testPredict, testY = train_predict_lstm_lookback(x_train, y_train, x_test, y_test, min_max_scaler, dataset, stockname, look_back=3)
keyasic_eps = [
{'year': 2018, 'quarter': 1, 'eps': 0.04},
{'year': 2018, 'quarter': 2, 'eps': 0.07},
{'year': 2018, 'quarter': 3, 'eps': 0.11},
{'year': 2018, 'quarter': 4, 'eps': 0.17},
{'year': 2019, 'quarter': 1, 'eps': 0.15},
{'year': 2019, 'quarter': 2, 'eps': 0.03},
{'year': 2019, 'quarter': 3, 'eps': -0.09},
{'year': 2019, 'quarter': 4, 'eps': -0.3},
{'year': 2020, 'quarter': 1, 'eps': -0.16},
{'year': 2020, 'quarter': 2, 'eps': -0.21},
{'year': 2020, 'quarter': 3, 'eps': -0.21},
{'year': 2020, 'quarter': 4, 'eps': -1.17},
{'year': 2021, 'quarter': 1, 'eps': -0.1},
{'year': 2021, 'quarter': 2, 'eps': -0.25},
{'year': 2021, 'quarter': 3, 'eps': -0.15},
{'year': 2021, 'quarter': 4, 'eps': -0.17},
{'year': 2022, 'quarter': 1, 'eps': -0.04},
{'year': 2022, 'quarter': 2, 'eps': -0.12},
{'year': 2022, 'quarter': 3, 'eps': -0.11},
{'year': 2022, 'quarter': 4, 'eps': -0.39},
{'year': 2023, 'quarter': 1, 'eps': -0.15},
{'year': 2023, 'quarter': 2, 'eps': -0.28},
{'year': 2023, 'quarter': 3, 'eps': 0},
{'year': 2023, 'quarter': 4, 'eps': 0.03},
{'year': 2024, 'quarter': 1, 'eps': -0.23}
]
df_eps_train = pd.DataFrame(keyasic_eps)
df_eps_train['eps'] = df_eps_train['eps'].shift(-2)
df_eps_test = pd.DataFrame(keyasic_eps)
df_eps_test['eps'] = df_eps_test['eps'].shift(-2)
df_eps_train = calculate_pe_ratio_train(df_plot_train, df_eps_train)
df_eps_test = calculate_pe_ratio_test(df_plot_test, df_eps_test)
plot_decision_static(df_eps_train, df_eps_test, stockname)
plot_decision_interactive(df_eps_train, df_eps_test, stockname)
2/2 [==============================] - 0s 2ms/step 1/1 [==============================] - 0s 15ms/step
stockname = 'mpi'
settings = {
"hyperparameters": {
"external_signals": {
"external_signals_combo": [
[],
]
}
}
}
data = getDataset(stockname=stockname, settings=settings)
rel_columns, min_max_scaler, train_size, test_size, train, test, x_train, y_train, x_test, y_test, dataset = split_train_test_lstm(data, look_back=1)
trainPredict, trainY, testPredict, testY = train_predict_lstm_lookback(x_train, y_train, x_test, y_test, min_max_scaler, dataset, stockname, look_back=1)
mpi_eps = [
{'year': 2018, 'quarter': 1, 'eps': 19.08},
{'year': 2018, 'quarter': 2, 'eps': 21.69},
{'year': 2018, 'quarter': 3, 'eps': 13.68},
{'year': 2018, 'quarter': 4, 'eps': 20.55},
{'year': 2019, 'quarter': 1, 'eps': 22.25},
{'year': 2019, 'quarter': 2, 'eps': 20.64},
{'year': 2019, 'quarter': 3, 'eps': 8.82},
{'year': 2019, 'quarter': 4, 'eps': 15.82},
{'year': 2020, 'quarter': 1, 'eps': 19.36},
{'year': 2020, 'quarter': 2, 'eps': 23.73},
{'year': 2020, 'quarter': 3, 'eps': 11.59},
{'year': 2020, 'quarter': 4, 'eps': 25.73},
{'year': 2021, 'quarter': 1, 'eps': 28.05},
{'year': 2021, 'quarter': 2, 'eps': 33.87},
{'year': 2021, 'quarter': 3, 'eps': 37.55},
{'year': 2021, 'quarter': 4, 'eps': 37.85},
{'year': 2022, 'quarter': 1, 'eps': 41.17},
{'year': 2022, 'quarter': 2, 'eps': 43.0},
{'year': 2022, 'quarter': 3, 'eps': 40.98},
{'year': 2022, 'quarter': 4, 'eps': 40.47},
{'year': 2023, 'quarter': 1, 'eps': 26.5},
{'year': 2023, 'quarter': 2, 'eps': 9.22},
{'year': 2023, 'quarter': 3, 'eps': -8.97},
{'year': 2023, 'quarter': 4, 'eps': 4.09},
{'year': 2024, 'quarter': 1, 'eps': 8.31}
]
df_eps_train = pd.DataFrame(mpi_eps)
df_eps_train['eps'] = df_eps_train['eps'].shift(-2)
df_eps_test = pd.DataFrame(mpi_eps)
df_eps_test['eps'] = df_eps_test['eps'].shift(-2)
df_eps_train = calculate_pe_ratio_train(df_plot_train, df_eps_train)
df_eps_test = calculate_pe_ratio_test(df_plot_test, df_eps_test)
plot_decision_static(df_eps_train, df_eps_test, stockname)
plot_decision_interactive(df_eps_train, df_eps_test, stockname)
2/2 [==============================] - 0s 2ms/step 1/1 [==============================] - 0s 16ms/step
stockname = 'mqtech'
settings = {
"hyperparameters": {
"external_signals": {
"import_price_semicon": {
"date_col":"ds",
"value_col":"import_price_semicon",
"lags":11
},
"ppi_semicon": {
"date_col":"ds",
"value_col":"ppi_semicon",
"lags":0
},
"industrial_production": {
"date_col":"ds",
"value_col":"industrial_production",
"lags":4
},
"all_employees_semicon": {
"date_col":"ds",
"value_col":"all_employees_semicon",
"lags":0
},
"cboe_volatility_index": {
"date_col":"ds",
"value_col":"cboe_volatility_index",
"lags":5
},
"external_signals_combo": [
[],
[
"import_price_semicon"
],
[
"ppi_semicon"
],
[
"industrial_production"
],
[
"all_employees_semicon"
],
[
"cboe_volatility_index"
],
[
"import_price_semicon",
"ppi_semicon",
"industrial_production",
"all_employees_semicon",
"cboe_volatility_index",
]
]
}
}
}
random_grid = {'n_estimators':100}
data = getDataset(stockname=stockname, settings=settings)
rel_columns, X, y, X_train, X_test, y_train, y_test, min_max_scaler, X_train_scaled, X_test_scaled = split_train_test_rf(settings, data)
rf, predictions_train, predictions_test = train_predict_rf_tuned(X_train_scaled, y_train, X_test_scaled, y_test, random_grid)
df_plot_train, df_plot_test = plot_tuned_prediction_static_rf(X_train_scaled, y_train, predictions_train, X_test_scaled,
y_test, predictions_test, stockname)
mqtech_eps = [
{'year': 2018, 'quarter': 1, 'eps': 0.04},
{'year': 2018, 'quarter': 2, 'eps': -0.29},
{'year': 2018, 'quarter': 3, 'eps': -0.16},
{'year': 2018, 'quarter': 4, 'eps': -0.66},
{'year': 2019, 'quarter': 1, 'eps': -0.12},
{'year': 2019, 'quarter': 2, 'eps': -0.35},
{'year': 2019, 'quarter': 3, 'eps': -0.09},
{'year': 2019, 'quarter': 4, 'eps': -0.61},
{'year': 2020, 'quarter': 1, 'eps': -0.11},
{'year': 2020, 'quarter': 2, 'eps': -0.96},
{'year': 2020, 'quarter': 3, 'eps': -0.07},
{'year': 2020, 'quarter': 4, 'eps': 0.01},
{'year': 2021, 'quarter': 1, 'eps': -0.03},
{'year': 2021, 'quarter': 2, 'eps': 0.04},
{'year': 2021, 'quarter': 3, 'eps': -0.11},
{'year': 2021, 'quarter': 4, 'eps': 0.02},
{'year': 2022, 'quarter': 1, 'eps': 0},
{'year': 2022, 'quarter': 2, 'eps': 0.1},
{'year': 2022, 'quarter': 3, 'eps': 0.06},
{'year': 2022, 'quarter': 4, 'eps': -0.05},
{'year': 2023, 'quarter': 1, 'eps': -0.24},
{'year': 2023, 'quarter': 2, 'eps': -0.33},
{'year': 2023, 'quarter': 3, 'eps': -0.48},
{'year': 2023, 'quarter': 4, 'eps': 0},
{'year': 2024, 'quarter': 1, 'eps': 0}
]
df_eps_train = pd.DataFrame(mqtech_eps)
df_eps_test = pd.DataFrame(mqtech_eps)
df_eps_train_ = calculate_pe_ratio_train(df_plot_train, df_eps_train)
df_eps_test_ = calculate_pe_ratio_test(df_plot_test, df_eps_test)
plot_decision_static(df_eps_train_, df_eps_test_, stockname)
plot_decision_interactive(df_eps_train_, df_eps_test_, stockname)
stockname = 'turiya'
settings = {
"hyperparameters": {
"external_signals": {
"industrial_production": {
"date_col":"ds",
"value_col":"industrial_production",
"lags":10
},
"export_price_index_harmonized": {
"date_col":"ds",
"value_col":"export_price_index_harmonized",
"lags":2
},
"cboe_volatility_index": {
"date_col":"ds",
"value_col":"cboe_volatility_index",
"lags":5
},
"manufacturers_inventories_to_shipments": {
"date_col":"ds",
"value_col":"manufacturers_inventories_to_shipments",
"lags":10
},
"external_signals_combo": [
[],
[
"industrial_production"
],
[
"export_price_index_harmonized"
],
[
"cboe_volatility_index"
],
[
"manufacturers_inventories_to_shipments"
],
[
"industrial_production",
"export_price_index_harmonized",
"cboe_volatility_index",
"manufacturers_inventories_to_shipments",
]
]
}
}
}
random_grid = {'n_estimators':100}
data = getDataset(stockname=stockname, settings=settings)
rel_columns, X, y, X_train, X_test, y_train, y_test, min_max_scaler, X_train_scaled, X_test_scaled = split_train_test_rf(settings, data)
rf, predictions_train, predictions_test = train_predict_rf_tuned(X_train_scaled, y_train, X_test_scaled, y_test, random_grid)
df_plot_train, df_plot_test = plot_tuned_prediction_static_rf(X_train_scaled, y_train, predictions_train, X_test_scaled,
y_test, predictions_test, stockname)
turiya_eps = [
{'year': 2018, 'quarter': 1, 'eps': -0.1},
{'year': 2018, 'quarter': 2, 'eps': 0.35},
{'year': 2018, 'quarter': 3, 'eps': -0.08},
{'year': 2018, 'quarter': 4, 'eps': -1.34},
{'year': 2019, 'quarter': 1, 'eps': 0.54},
{'year': 2019, 'quarter': 2, 'eps': -0.05},
{'year': 2019, 'quarter': 3, 'eps': -0.74},
{'year': 2019, 'quarter': 4, 'eps': -0.43},
{'year': 2020, 'quarter': 1, 'eps': 0.03},
{'year': 2020, 'quarter': 2, 'eps': 0.11},
{'year': 2020, 'quarter': 3, 'eps': -0.01},
{'year': 2020, 'quarter': 4, 'eps': 0.08},
{'year': 2021, 'quarter': 1, 'eps': 0.15},
{'year': 2021, 'quarter': 2, 'eps': 0.02},
{'year': 2021, 'quarter': 3, 'eps': 0.22},
{'year': 2021, 'quarter': 4, 'eps': -0.13},
{'year': 2022, 'quarter': 1, 'eps': 0.32},
{'year': 2022, 'quarter': 2, 'eps': 0.23},
{'year': 2022, 'quarter': 3, 'eps': 0.08},
{'year': 2022, 'quarter': 4, 'eps': 0.18},
{'year': 2023, 'quarter': 1, 'eps': 0.43},
{'year': 2023, 'quarter': 2, 'eps': 0.26},
{'year': 2023, 'quarter': 3, 'eps': 0.30},
{'year': 2023, 'quarter': 4, 'eps': 0.77},
{'year': 2024, 'quarter': 1, 'eps': 0.34},
{'year': 2024, 'quarter': 1, 'eps': 0.33}
]
df_eps_train = pd.DataFrame(turiya_eps)
df_eps_train['eps'] = df_eps_train['eps'].shift(-3)
df_eps_test = pd.DataFrame(turiya_eps)
df_eps_test['eps'] = df_eps_test['eps'].shift(-3)
df_eps_train_ = calculate_pe_ratio_train(df_plot_train, df_eps_train)
df_eps_test_ = calculate_pe_ratio_test(df_plot_test, df_eps_test)
plot_decision_static(df_eps_train_, df_eps_test_, stockname)
plot_decision_interactive(df_eps_train_, df_eps_test_, stockname)
stockname = 'unisem'
settings = {
"hyperparameters": {
"external_signals": {
"industrial_production": {
"date_col":"ds",
"value_col":"industrial_production",
"lags":9
},
"export_price_index_harmonized": {
"date_col":"ds",
"value_col":"export_price_index_harmonized",
"lags":0
},
"cboe_volatility_index": {
"date_col":"ds",
"value_col":"cboe_volatility_index",
"lags":10
},
"manufacturers_inventories_to_shipments": {
"date_col":"ds",
"value_col":"manufacturers_inventories_to_shipments",
"lags":10
},
"external_signals_combo": [
[],
[
"industrial_production"
],
[
"export_price_index_harmonized"
],
[
"cboe_volatility_index"
],
[
"manufacturers_inventories_to_shipments"
],
[
"industrial_production",
"export_price_index_harmonized",
"cboe_volatility_index",
"manufacturers_inventories_to_shipments",
]
]
}
}
}
random_grid = {'n_estimators': 1600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 70, 'bootstrap': False}
data = getDataset(stockname=stockname, settings=settings)
rel_columns, X, y, X_train, X_test, y_train, y_test, min_max_scaler, X_train_scaled, X_test_scaled = split_train_test_rf(settings, data)
rf, predictions_train, predictions_test = train_predict_rf_tuned(X_train_scaled, y_train, X_test_scaled, y_test, random_grid)
df_plot_train, df_plot_test = plot_tuned_prediction_static_rf(X_train_scaled, y_train, predictions_train, X_test_scaled,
y_test, predictions_test, stockname)
unisem_eps = [
{'year': 2018, 'quarter': 1, 'eps': 0.82},
{'year': 2018, 'quarter': 2, 'eps': 4.26},
{'year': 2018, 'quarter': 3, 'eps': 4.83},
{'year': 2018, 'quarter': 4, 'eps': 3.23},
{'year': 2019, 'quarter': 1, 'eps': 0.83},
{'year': 2019, 'quarter': 2, 'eps': 1.99},
{'year': 2019, 'quarter': 3, 'eps': -0.44},
{'year': 2019, 'quarter': 4, 'eps': -3.69},
{'year': 2020, 'quarter': 1, 'eps': -0.39},
{'year': 2020, 'quarter': 2, 'eps': 4.67},
{'year': 2020, 'quarter': 3, 'eps': 6.98},
{'year': 2020, 'quarter': 4, 'eps': 8.18},
{'year': 2021, 'quarter': 1, 'eps': 5.73},
{'year': 2021, 'quarter': 2, 'eps': 6.8},
{'year': 2021, 'quarter': 3, 'eps': 4.99},
{'year': 2021, 'quarter': 4, 'eps': 3.56},
{'year': 2022, 'quarter': 1, 'eps': 3.14},
{'year': 2022, 'quarter': 2, 'eps': 12.76},
{'year': 2022, 'quarter': 3, 'eps': 3.83},
{'year': 2022, 'quarter': 4, 'eps': 4.16},
{'year': 2023, 'quarter': 1, 'eps': 0.61},
{'year': 2023, 'quarter': 2, 'eps': 1.48},
{'year': 2023, 'quarter': 3, 'eps': 1.12},
{'year': 2023, 'quarter': 4, 'eps': 0},
{'year': 2024, 'quarter': 1, 'eps': 0}
]
df_eps_train = pd.DataFrame(unisem_eps)
df_eps_test = pd.DataFrame(unisem_eps)
df_eps_train_ = calculate_pe_ratio_train(df_plot_train, df_eps_train)
df_eps_test_ = calculate_pe_ratio_test(df_plot_test, df_eps_test)
plot_decision_static(df_eps_train_, df_eps_test_, stockname)
plot_decision_interactive(df_eps_train_, df_eps_test_, stockname)
stockname = 'vis'
settings = {
"hyperparameters": {
"external_signals": {
"industrial_production": {
"date_col":"ds",
"value_col":"industrial_production",
"lags":10
},
"cboe_volatility_index": {
"date_col":"ds",
"value_col":"cboe_volatility_index",
"lags":11
},
"external_signals_combo": [
[],
[
"industrial_production"
],
[
"cboe_volatility_index"
],
[
"industrial_production",
"cboe_volatility_index"
]
]
}
}
}
random_grid = {'n_estimators': 600, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 110, 'bootstrap': False}
data = getDataset(stockname=stockname, settings=settings)
rel_columns, X, y, X_train, X_test, y_train, y_test, min_max_scaler, X_train_scaled, X_test_scaled = split_train_test_rf(settings, data)
rf, predictions_train, predictions_test = train_predict_rf_tuned(X_train_scaled, y_train, X_test_scaled, y_test, random_grid)
df_plot_train, df_plot_test = plot_tuned_prediction_static_rf(X_train_scaled, y_train, predictions_train, X_test_scaled,
y_test, predictions_test, stockname)
vis_eps = [
{'year': 2018, 'quarter': 1, 'eps': 0.26},
{'year': 2018, 'quarter': 2, 'eps': 1.18},
{'year': 2018, 'quarter': 3, 'eps': 1.62},
{'year': 2018, 'quarter': 4, 'eps': 1.79},
{'year': 2019, 'quarter': 1, 'eps': -0.26},
{'year': 2019, 'quarter': 2, 'eps': 0.44},
{'year': 2019, 'quarter': 3, 'eps': 0.59},
{'year': 2019, 'quarter': 4, 'eps': 2.23},
{'year': 2020, 'quarter': 1, 'eps': -1},
{'year': 2020, 'quarter': 2, 'eps': 0.66},
{'year': 2020, 'quarter': 3, 'eps': -0.5},
{'year': 2020, 'quarter': 4, 'eps': 2.26},
{'year': 2021, 'quarter': 1, 'eps': 1.48},
{'year': 2021, 'quarter': 2, 'eps': 0.65},
{'year': 2021, 'quarter': 3, 'eps': 2.32},
{'year': 2021, 'quarter': 4, 'eps': 1.09},
{'year': 2022, 'quarter': 1, 'eps': 1.59},
{'year': 2022, 'quarter': 2, 'eps': 1.8},
{'year': 2022, 'quarter': 3, 'eps': 0.77},
{'year': 2022, 'quarter': 4, 'eps': 3.12},
{'year': 2023, 'quarter': 1, 'eps': -1.92},
{'year': 2023, 'quarter': 2, 'eps': 1.49},
{'year': 2023, 'quarter': 3, 'eps': 0.67},
{'year': 2023, 'quarter': 4, 'eps': 0},
{'year': 2024, 'quarter': 1, 'eps': 0}
]
df_eps_train = pd.DataFrame(vis_eps)
df_eps_test = pd.DataFrame(vis_eps)
df_eps_train_ = calculate_pe_ratio_train(df_plot_train, df_eps_train)
df_eps_test_ = calculate_pe_ratio_test(df_plot_test, df_eps_test)
plot_decision_static(df_eps_train_, df_eps_test_, stockname)
plot_decision_interactive(df_eps_train_, df_eps_test_, stockname)
stockname = 'vitrox'
settings = {
"hyperparameters": {
"external_signals": {
"export_price_index_harmonized": {
"date_col":"ds",
"value_col":"export_price_index_harmonized",
"lags":0
},
"manufacturers_inventories_to_shipments": {
"date_col":"ds",
"value_col":"manufacturers_inventories_to_shipments",
"lags":0
},
"external_signals_combo": [
[],
[
"export_price_index_harmonized"
],
[
"manufacturers_inventories_to_shipments"
],
[
"export_price_index_harmonized",
"manufacturers_inventories_to_shipments"
]
]
}
}
}
random_grid = {'n_estimators':100}
data = getDataset(stockname=stockname, settings=settings)
rel_columns, X, y, X_train, X_test, y_train, y_test, min_max_scaler, X_train_scaled, X_test_scaled = split_train_test_rf(settings, data)
rf, predictions_train, predictions_test = train_predict_rf_tuned(X_train_scaled, y_train, X_test_scaled, y_test, random_grid)
df_plot_train, df_plot_test = plot_tuned_prediction_static_rf(X_train_scaled, y_train, predictions_train, X_test_scaled,
y_test, predictions_test, stockname)
vitrox_eps = [
{'year': 2018, 'quarter': 1, 'eps': 4.31},
{'year': 2018, 'quarter': 2, 'eps': 5.9},
{'year': 2018, 'quarter': 3, 'eps': 5.96},
{'year': 2018, 'quarter': 4, 'eps': 6.26},
{'year': 2019, 'quarter': 1, 'eps': 5.02},
{'year': 2019, 'quarter': 2, 'eps': 5.18},
{'year': 2019, 'quarter': 3, 'eps': 2.94},
{'year': 2019, 'quarter': 4, 'eps': 3.78},
{'year': 2020, 'quarter': 1, 'eps': 4.47},
{'year': 2020, 'quarter': 2, 'eps': 4.87},
{'year': 2020, 'quarter': 3, 'eps': 6.3},
{'year': 2020, 'quarter': 4, 'eps': 6.77},
{'year': 2021, 'quarter': 1, 'eps': 6.5},
{'year': 2021, 'quarter': 2, 'eps': 10.73},
{'year': 2021, 'quarter': 3, 'eps': 8.94},
{'year': 2021, 'quarter': 4, 'eps': 9.77},
{'year': 2022, 'quarter': 1, 'eps': 5.3},
{'year': 2022, 'quarter': 2, 'eps': 5.44},
{'year': 2022, 'quarter': 3, 'eps': 5.38},
{'year': 2022, 'quarter': 4, 'eps': 5.14},
{'year': 2023, 'quarter': 1, 'eps': 3.49},
{'year': 2023, 'quarter': 2, 'eps': 3.98},
{'year': 2023, 'quarter': 3, 'eps': 3.52},
{'year': 2023, 'quarter': 4, 'eps': 0},
{'year': 2024, 'quarter': 1, 'eps': 0}
]
df_eps_train = pd.DataFrame(vitrox_eps)
df_eps_test = pd.DataFrame(vitrox_eps)
df_eps_train_ = calculate_pe_ratio_train(df_plot_train, df_eps_train)
df_eps_test_ = calculate_pe_ratio_test(df_plot_test, df_eps_test)
plot_decision_static(df_eps_train_, df_eps_test_, stockname)
plot_decision_interactive(df_eps_train_, df_eps_test_, stockname)